loads libraries
library(here)
library(rjags)
library(ggplot2)
library(tidyverse)
library(gNanoPkg)
Initialise data
gNano.df = readData()
Specify where the results are stored relative to the gNano project and this file.
jamesDuncan = TRUE
resultsRoot = if(jamesDuncan){
"../systematic/"
}else{
"../gNanoPkg/systematic/"
}
Model \(g_0\)
Model: \[ y_{c,l,a} \sim \Gamma(r,s) \] If \[ \mathrm{E}[y_{c,l,a}]) = \log(\mu)\mbox{ and }\mathrm{Var}[y_{c,l,a}] = \sigma^2 \] then we let \[ r = \frac{\mu^2}{\sigma^2}\mbox{ and }s = \frac{\mu}{\sigma^2} \] Our prior on \(\sigma^2\) is \(inverse-\Gamma(0.001, 0.001)\), and our prior on \(\log(\mu)\) is \(N(0, 10^6)\).
First I will load up the results.
g0.df = loadResults("g-0", resultsRoot = resultsRoot)
I’ll do this with ggplot2 so I can teach myself something
p = g0.df %>%
ggplot(aes(x = tau)) +
geom_density()
p
We want plots of shape and rate.
p = g0.df %>%
ggplot(aes(x = shape))+
geom_density()
p
p = g0.df %>%
ggplot(aes(x = rate))+
geom_density()
p
rateDensity = density(g0.df$rate)
shapeDensity = density(g0.df$shape)
rateMode = rateDensity$x[which.max(rateDensity$y)]
shapeMode = shapeDensity$x[which.max(shapeDensity$y)]
density.df = data.frame(x = seq(1, 30000, 1)) %>%
mutate(y = dgamma(x, rate = rateMode, shape = shapeMode))
p = gNano.df %>%
ggplot(aes(x = obs, y = stat(density))) +
geom_histogram(binwidth = 400) +
geom_line(data = density.df, aes(x, y))
p
Plot the expected vs the observed peak heights
obsExpectPlot(g0.df, "g")
Model \(g_1\)
Let’s have a look at the profile effects. I like to use error bar plots
g1.df = loadResults("g-1", resultsRoot = resultsRoot)
This code is not elegant, and is probably stupid but then so is the tidyverse a lot of the time :-)
effectsPlot(g1.df, "profile")
aph = gNano.df %>%
group_by(prof) %>%
summarise(aph = mean(log(obs), na.rm = TRUE)) %>%
pull(aph)
meanEffect = g1.df %>%
select(starts_with("beta.profile")) %>%
summarise_all(mean, na.rm = TRUE) %>%
unlist()
plot.df = data.frame(aph = log10(exp(aph)), meanEffect = meanEffect)
p = plot.df %>%
ggplot(aes(x = aph, y = meanEffect)) +
geom_point(show.legend = FALSE) +
geom_smooth() +
xlab(expression(log[10]~(aph))) +
ylab(bquote(mean~tau[c])) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
Plot the expected vs the observed peak heights
obsExpectPlot(g1.df, "g")
and a pred-res plot
obsExpectPlot(g1.df, "g", predRes = TRUE)
Model \(g_2\)
g2.df = loadResults("g-2", resultsRoot = resultsRoot)
effectsPlot(g2.df, "locus")
Plot the expected vs observed peak heights
obsExpectPlot(g2.df, "g")
## Warning: Removed 348 rows containing non-finite values (stat_smooth).
and the pred-res plot
obsExpectPlot(g2.df, "g", TRUE)
Plot the locus effects by Average Peak Height:
aph = gNano.df %>%
group_by(loc) %>%
summarise(aph = mean(log(obs), na.rm = TRUE)) %>%
pull(aph)
meanEffect = g2.df %>%
select(starts_with("alpha.locus")) %>%
summarise_all(mean, na.rm = TRUE) %>%
unlist()
plot.df = data.frame(aph = log10(exp(aph)), meanEffect = meanEffect)
p = plot.df %>%
ggplot(aes(x = aph, y = meanEffect)) +
geom_point(show.legend = FALSE) +
geom_smooth() +
xlab(expression(log[10]~(aph))) +
ylab(bquote(mean~alpha[l])) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
Model \(g_2\)
Dye effects
g3.df = loadResults("g-3", resultsRoot = resultsRoot)
effectsPlot(g3.df, "dye")
Plot the dye effects by Average Peak Height - no fitted line here because with four points who cares:
aph = gNano.df %>%
group_by(dye) %>%
summarise(aph = mean(log(obs), na.rm = TRUE)) %>%
pull(aph)
meanEffect = g3.df %>%
select(starts_with("gamma.dye")) %>%
summarise_all(mean, na.rm = TRUE) %>%
unlist()
plot.df = data.frame(aph = log10(exp(aph)), meanEffect = meanEffect)
p = plot.df %>%
ggplot(aes(x = aph, y = meanEffect)) +
geom_point(show.legend = FALSE) +
geom_smooth() +
xlab(expression(log[10]~(aph))) +
ylab(bquote(mean~delta[f])) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 3.296
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.095418
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.036101
## Warning in sqrt(sum.squares/one.delta): NaNs produced
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : span too small.
## fewer data values than degrees of freedom.
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 3.296
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.095418
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.036101
## Warning in stats::qt(level/2 + 0.5, pred$df): NaNs produced
Plot the expected vs observed peak heights
obsExpectPlot(g3.df, "g")
## Warning: Removed 19 rows containing non-finite values (stat_smooth).
## Warning: Removed 569 rows containing non-finite values (stat_smooth).
and the pred-res plot
g3.predRes = obsExpectPlot(g3.df, "g", TRUE)
Is the added variance being used?
g4.df = loadResults("g-4", resultsRoot = resultsRoot)
g4.df = g4.df %>%
mutate(sigma.sq0 = 1 / tau0)
p = g4.df %>%
ggplot(aes(x = sigma.sq0/10^6)) +
geom_density() +
geom_vline(xintercept = quantile(g4.df$sigma.sq0/10^6, probs=0.025)) +
geom_vline(xintercept = quantile(g4.df$sigma.sq0/10^6, probs=0.975)) +
xlab(bquote(sigma[0]^2~(x10^6))) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
Plot the expected vs observed peak heights
obsExpectPlot(g4.df, "g")
## Warning: Removed 19 rows containing non-finite values (stat_smooth).
## Warning: Removed 589 rows containing non-finite values (stat_smooth).
And the pred-res plot
g4.predRes = obsExpectPlot(g4.df, "g", TRUE)
Is it doing anything?
plot.df = data.frame(
expected.g3 = g3.predRes$plot.df$mean,
expected.g4 = g4.predRes$plot.df$mean
)
p = plot.df %>%
ggplot(aes(x = expected.g3, y = expected.g4)) +
geom_point() +
geom_abline(aes(intercept = 0, slope = 1)) +
xlim(1, 5) +
ylim(1, 5)
p
I think it’s doing what it is supposed to.
Let’s try and look at the expected values slightly differently. I will work with the summary stats rather than the raw data
simSummary = loadResults("g-3", resultsRoot = resultsRoot, summary = TRUE)
g3.quantiles = as.data.frame(t(simSummary$quantiles)) %>%
select(starts_with("pred")) %>%
rownames_to_column %>%
gather(var, value, -rowname) %>%
spread(rowname, value) %>%
mutate(var = as.numeric(gsub("^pred\\[([0-9]+)\\]$", "\\1", var))) %>%
arrange(var)
g3.stats = as.data.frame(t(simSummary$statistics)) %>%
select(starts_with("pred")) %>%
rownames_to_column %>%
gather(var, value, -rowname) %>%
spread(rowname, value) %>%
mutate(var = as.numeric(gsub("^pred\\[([0-9]+)\\]$", "\\1", var))) %>%
arrange(var)
simSummary = loadResults("g-4", resultsRoot = resultsRoot, summary = TRUE)
g4.quantiles = as.data.frame(t(simSummary$quantiles)) %>%
select(starts_with("pred")) %>%
rownames_to_column %>%
gather(var, value, -rowname) %>%
spread(rowname, value) %>%
mutate(var = as.numeric(gsub("^pred\\[([0-9]+)\\]$", "\\1", var))) %>%
arrange(var)
plot.df = bind_rows(g3.quantiles, g4.quantiles) %>%
mutate(model = rep(c("g3", "g4"), rep(nrow(g3.quantiles), 2))) %>%
mutate(obs = rep(gNano.df$obs, 2)) %>%
rename(med = `50%`, lb = `2.5%`, ub = `97.5%`) %>%
select(obs, model, lb, med, ub) %>%
arrange(obs)
p = plot.df %>%
ggplot(aes(x = log10(obs), y = log10(med))) +
facet_grid(vars(model)) +
geom_abline(aes(intercept = 0, slope = 1)) +
xlim(1, 5) +
ylim(1, 5) +
geom_point(show.legend = FALSE) +
geom_smooth() +
geom_line(aes(y = log10(lb)), col = "green") + geom_line(aes(y = log10(ub)), col = "red")
p
## Warning: Removed 38 rows containing non-finite values (stat_smooth).
## Warning: Removed 38 rows containing missing values (geom_point).
## Warning: Removed 26 rows containing missing values (geom_path).
Now we move on the the log normal models
Tau
ln0.df = loadResults("ln-0", resultsRoot = resultsRoot)
p = ln0.df %>%
ggplot(aes(x = tau)) +
geom_density()
p
Mu
p = ln0.df %>%
ggplot(aes(x = Mu)) +
geom_density()
p
Comparing expected and observed peak heights
obsExpectPlot(ln0.df, "ln")
and the pred-res plot
obsExpectPlot(ln0.df, "ln", TRUE)
Load the data
ln1.df = loadResults("ln-1", resultsRoot = resultsRoot)
Showing the per profile effects
effectsPlot(ln1.df, "profile")
Now showing the relationship between profile aph and profile effects from model
aph = gNano.df %>%
group_by(prof) %>%
summarise(aph = mean(log(obs), na.rm = TRUE)) %>%
pull(aph)
meanEffect = ln1.df %>%
select(starts_with("beta.profile")) %>%
summarise_all(mean, na.rm = TRUE) %>%
unlist()
plot.df = data.frame(aph = log10(exp(aph)), meanEffect = meanEffect)
p = plot.df %>%
ggplot(aes(x = aph, y = meanEffect)) +
geom_point(show.legend = FALSE) +
geom_smooth() +
xlab(expression(log[10]~(aph))) +
ylab(bquote(mean~tau[c])) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
Plotting observed vs expected peak heights
obsExpectPlot(ln1.df, "ln")
and the pred-res plot
obsExpectPlot(ln1.df, "ln", TRUE)
## Model \(ln_2\) - locus effects
ln2.df = loadResults("ln-2", resultsRoot = resultsRoot)
effectsPlot(ln2.df, "locus")
Plot the expected vs observed peak heights
obsExpectPlot(ln2.df, "ln")
and the pred-res
obsExpectPlot(ln2.df, "ln", TRUE)
Plot the locus effects by Average Peak Height:
aph = gNano.df %>%
group_by(loc) %>%
summarise(aph = mean(log(obs), na.rm = TRUE)) %>%
pull(aph)
meanEffect = ln2.df %>%
select(starts_with("alpha.locus")) %>%
summarise_all(mean, na.rm = TRUE) %>%
unlist()
plot.df = data.frame(aph = log10(exp(aph)), meanEffect = meanEffect)
p = plot.df %>%
ggplot(aes(x = aph, y = meanEffect)) +
geom_point(show.legend = FALSE) +
geom_smooth() +
xlab(expression(log[10]~(aph))) +
ylab(bquote(mean~alpha[l])) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
Dye effects
ln3.df = loadResults("ln-3", resultsRoot = resultsRoot)
effectsPlot(ln3.df, "dye")
Plot the dye effects by Average Peak Height - no fitted line here because with four points who cares:
aph = gNano.df %>%
group_by(dye) %>%
summarise(aph = mean(log(obs), na.rm = TRUE)) %>%
pull(aph)
meanEffect = ln3.df %>%
select(starts_with("gamma.dye")) %>%
summarise_all(mean, na.rm = TRUE) %>%
unlist()
plot.df = data.frame(aph = log10(exp(aph)), meanEffect = meanEffect)
p = plot.df %>%
ggplot(aes(x = aph, y = meanEffect)) +
geom_point(show.legend = FALSE) +
geom_smooth() +
xlab(expression(log[10]~(aph))) +
ylab(bquote(mean~delta[f])) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 3.296
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.095418
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.036101
## Warning in sqrt(sum.squares/one.delta): NaNs produced
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : span too small.
## fewer data values than degrees of freedom.
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 3.296
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.095418
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.036101
## Warning in stats::qt(level/2 + 0.5, pred$df): NaNs produced
Plot the expected vs observed peak heights
obsExpectPlot(ln3.df, "ln")
and the pred-res
obsExpectPlot(ln3.df, "ln", TRUE)
Is the added variance being used?
ln4.df = loadResults("ln-4", resultsRoot = resultsRoot)
ln4.df = ln4.df %>%
mutate(sigma.sq0 = 1 / tau0)
p = ln4.df %>%
ggplot(aes(x = sigma.sq0)) +
geom_density() +
geom_vline(xintercept = quantile(ln4.df$sigma.sq0, probs=0.025)) +
geom_vline(xintercept = quantile(ln4.df$sigma.sq0, probs=0.975)) +
xlab(bquote(sigma[0]^2)) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
Plot the expected vs observed peak heights
obsExpectPlot(ln4.df, "ln")
And the pred-res
obsExpectPlot(ln4.df, "ln", TRUE)
Compare the dlog(likelihoods) for the log-normal model 4 vs the gamma model 4
ln4.ll = calcLogLik(ln4.df, "ln")
g4.ll = calcLogLik(g4.df, "g")
compareLL(ln4.ll, g4.ll, labels = c("LN-4", "G-4"))
Comparing the g3 model and the g4 model
g3.ll = calcLogLik(g3.df, "g")
compareLL(g3.ll, g4.ll, labels = c("G-3", "G-4"))
Compare the ln3 and ln4 models
ln3.ll = calcLogLik(ln3.df, "ln")
## Warning: NAs introduced by coercion
compareLL(ln3.ll, ln4.ll, labels = c("LN-3", "LN-4"))
Compare all the ln models at once:
ln0.ll = calcLogLik(ln0.df, "ln")
ln1.ll = calcLogLik(ln1.df, "ln")
## Warning: NAs introduced by coercion
ln2.ll = calcLogLik(ln2.df, "ln")
## Warning: NAs introduced by coercion
compareLL(ln0.ll, ln1.ll, ln2.ll, ln3.ll, ln4.ll,
labels = paste0("LN-", 0:4))
LN-0 kind of distorts this so, let’s drop it.
compareLL(ln1.ll, ln2.ll, ln3.ll, ln4.ll,
labels = paste0("LN-", 1:4))
So no real value in the variance model. Definite gains in adding in dye and locus effects (and profile, but we don’t care about those).
ln5.df = loadResults("ln-5", resultsRoot = resultsRoot)
effectsPlot(ln5.df, "profile")
So this is interesting in that all the profile effects are non-zero. Let’s just look at ln4
effectsPlot(ln4.df, "profile")
effectsPlot(ln5.df, "locus")
Hmm - similar
effectsPlot(ln5.df, "dye")
And this too - all positive
obsExpectPlot(ln5.df, "ln")
No real change here in terms of predicted values
obsExpectPlot(ln5.df, "ln", TRUE)
What about change to the log-likelihood?
ln5.ll = calcLogLik(ln5.df, "ln")
compareLL(ln4.ll, ln5.ll, labels = c("LN-4", "LN-5"))
I think we’d say realistically no. So are the sum constraints being swallowed in the mean?
p = ln5.df %>%
ggplot(aes(x = Mu)) +
geom_density()
p
And I’d say the answer is “yes.” I think we should probably just stick with the constraints. I’m happier with them being there than not, and they make more sense.